setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

library(patchwork)
library(tidyverse)
library(broom)
library(stargazer)


dat <- read.csv("Compliance survey_September 6, 2022_03.39.csv", na.strings = "")


dat <- dat[-c(1, 2),] # delete first rows
dat <- filter(dat, consent != "No, I do not wish to continue with this study") # delete no consent
dat <- filter(dat, Status != "Survey Preview") # delete previews
dat <- dat %>% select(-c(1:5), -c(Finished:RecordedDate), -c(RecipientLastName:consent))

## Get treatment race

dat_race <- dat %>% select(PROLIFIC_PID, contains("_race")) %>%
  pivot_longer(cols = contains("_race")) %>%
  filter(!is.na(value)) %>%
  mutate(truerace = substr(name, 1,1),
         truesex = substr(name, 2,2),
         name = gsub("_race", "", name),
         treat_picture = grepl("p", name),
         treat_name = grepl("[mf]n", name),
         treat_resume = grepl("r", name)) %>%
  mutate(race_correct = (truerace == "h" & value == "Hispanic or Latino")|
           (truerace == "a" & value == "Asian or Pacific Islander")|
           (truerace == "b" & value == "African American")|
           (truerace == "w" & value == "White")) %>%
  mutate(truerace = factor(truerace, levels = c("w", "b", "h", "a")),
         truesex = factor(truesex, levels = c("m", "f")))

## Make the barplots
plot_df = dat_race %>%
  group_by(truerace, truesex, treat_picture, treat_name, treat_resume) %>%
  summarize(Correct = mean(race_correct),
            Correct_sd = sd(race_correct)) %>%
  mutate(truerace = dplyr::recode(truerace, w = "White", b = "Black",
                           h = "Hispanic", a = "Asian"),
         truesex = dplyr::recode(truesex, m = "Male", f = "Female")) %>%
  select(Correct, "Race" = truerace, "Sex" = truesex,
         treat_picture, treat_name, treat_resume) %>%
  mutate(Treatment = paste(treat_picture, treat_name, treat_resume, sep="_")) %>%
  mutate( Treatment = dplyr::recode(Treatment,
                             FALSE_FALSE_TRUE = "Resume Only",
                             FALSE_TRUE_FALSE = "Name Only",
                             TRUE_FALSE_FALSE = "Picture Only",
                             FALSE_TRUE_TRUE = "Name and Resume",
                             TRUE_FALSE_TRUE = "Picture and Resume",
                             TRUE_TRUE_FALSE = "Picture and Name",
                             TRUE_TRUE_TRUE = "Picture, Name, and Resume"),
          Treatment = factor(Treatment,
                             levels = c("Resume Only", "Name Only",
                                        "Picture Only", "Name and Resume",
                                        "Picture and Resume", "Picture and Name",
                                        "Picture, Name, and Resume")))

plot_df_avg = plot_df  %>%
  group_by(Treatment) %>%
  summarize(tx_avg = mean(Correct),
            tx_sd = sd(Correct)/sqrt(n()))

plot_df = plot_df %>% left_join(plot_df_avg)
  

ggplot(plot_df, aes(y = Correct, x = Race, fill = Sex)) + 
  geom_bar(stat="identity", position=position_dodge()) + 
  geom_errorbar(aes(ymin=Correct-tx_sd, ymax=Correct+tx_sd), width=.2,
                position=position_dodge(.9)) +
  facet_wrap(~Treatment) + 
  geom_hline(aes(yintercept = tx_avg), color="black", lty = "dashed") +
  geom_text(aes(label = paste0("Average: ", round(tx_avg, digits=2))),
            y = 1, x = 3, cex=3.5) +
  theme_bw() + 
  scale_fill_manual(values = c("grey30", "grey70")) + 
  xlab("")
ggsave("compliance_barplots.pdf")


ggplot(plot_df, aes(y = Correct, x = Treatment)) + 
  geom_bar(stat = "identity", position = position_dodge()) + 
  facet_wrap(~Race + Sex) + 
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 90))

## Describing the barplots
plot_df %>% group_by(treat_picture, treat_name, treat_resume) %>%
  summarize(mean(Correct))


## Read in covariate data
covars = read.csv("prolific_export_6304a475d4f1129467c0dd0c.csv")

dat_race2 = dat_race %>% left_join(covars %>% select(Participant.id, Age, Sex, Ethnicity.simplified, Employment.status),
                                  by = c("PROLIFIC_PID" = "Participant.id")) 
# regression table
m1 = lm(race_correct ~ truerace + truesex + treat_picture + treat_name + treat_resume, data = dat_race2)
summary(m1)

dat_race2$Age = as.numeric(dat_race2$Age)
dat_race2 = dat_race2 %>% filter(!Sex %in% c("CONSENT_REVOKED", "DATA_EXPIRED"),
                                 !Age %in% c("CONSENT_REVOKED", "DATA_EXPIRED"),
                                 !Ethnicity.simplified %in% c("CONSENT_REVOKED", "DATA_EXPIRED"),
                                 !Employment.status %in% c("CONSENT_REVOKED", "DATA_EXPIRED"))
dat_race2$Employment.status = relevel(as.factor(dat_race2$Employment.status), ref = c("Full-Time"))

m2 = lm(race_correct ~ truerace + truesex + treat_picture + treat_name + treat_resume + Age + Sex + Ethnicity.simplified + Employment.status, data = dat_race2)
summary(m2)

m3 = lm(race_correct ~ truerace*truesex + treat_picture + treat_name + treat_resume + Age + Sex + Ethnicity.simplified + Employment.status, data = dat_race2)
summary(m3)

stargazer(m1,m2,m3, header = FALSE, type = 'latex', digits=3)


## 2SLS
library(ivreg)

m_iv = ivreg(race_correct ~ ,
              data = dat_race2)


#### Do the same thing, but for class

dat_class <- dat %>% select(PROLIFIC_PID, contains("_class")) %>%
  pivot_longer(cols = contains("_class")) %>%
  filter(!is.na(value)) %>%
  mutate(truerace = substr(name, 1,1),
         truesex = substr(name, 2,2),
         name = gsub("_race", "", name),
         treat_picture = grepl("p", name),
         treat_name = grepl("[mf]n", name),
         treat_resume = grepl("r", name))  %>%
  mutate(truerace = factor(truerace, levels = c("w", "b", "h", "a")),
         truesex = factor(truesex, levels = c("m", "f")))


round(prop.table(table(dat_class$value,
                       dat_class$truerace,
                       dat_class$treat_picture),
                 margin=2),
      digits = 3)



dat_hire <- dat %>% select(PROLIFIC_PID, contains("_hireability")) %>%
  pivot_longer(cols = contains("_hireability")) %>%
  filter(!is.na(value)) %>%
  mutate(truerace = substr(name, 1,1),
         truesex = substr(name, 2,2),
         name = gsub("_hireability", "", name),
         treat_picture = grepl("p", name),
         treat_name = grepl("[mf]n", name),
         treat_resume = grepl("r", name)) %>%
  mutate(truerace = factor(truerace, levels = c("w", "b", "h", "a")),
         truesex = factor(truesex, levels = c("m", "f")),
         hire = factor(value, levels = c("Not at all", "Not very likely",
                                         "Slightly likely", "Somewhat likely",
                                         "Very likely")))

## Make the barplots
plot_df = dat_hire %>%
  group_by(truerace, truesex, treat_picture, treat_name, treat_resume) %>%
  summarize(Hire = mean(as.numeric(hire)),
            Hire_sd = sd(as.numeric(hire))) %>%
  mutate(truerace = dplyr::recode(truerace, w = "White", b = "Black",
                                  h = "Hispanic", a = "Asian"),
         truesex = dplyr::recode(truesex, m = "Male", f = "Female")) %>%
  select(Hire, "Race" = truerace, "Sex" = truesex,
         treat_picture, treat_name, treat_resume) %>%
  mutate(Treatment = paste(treat_picture, treat_name, treat_resume, sep="_")) %>%
  mutate( Treatment = dplyr::recode(Treatment,
                                    FALSE_FALSE_TRUE = "Resume Only",
                                    FALSE_TRUE_FALSE = "Name Only",
                                    TRUE_FALSE_FALSE = "Picture Only",
                                    FALSE_TRUE_TRUE = "Name and Resume",
                                    TRUE_FALSE_TRUE = "Picture and Resume",
                                    TRUE_TRUE_FALSE = "Picture and Name",
                                    TRUE_TRUE_TRUE = "Picture, Name, and Resume"),
          Treatment = factor(Treatment,
                             levels = c("Resume Only", "Name Only",
                                        "Picture Only", "Name and Resume",
                                        "Picture and Resume", "Picture and Name",
                                        "Picture, Name, and Resume")))

plot_df_avg = plot_df  %>%
  group_by(Treatment) %>%
  summarize(tx_avg = mean(Hire),
            tx_sd = sd(Hire)/sqrt(n()))

plot_df = plot_df %>% left_join(plot_df_avg)


ggplot(plot_df, aes(y = Hire, x = Race, fill = Sex)) + 
  geom_bar(stat="identity", position=position_dodge()) + 
  geom_errorbar(aes(ymin=Hire-tx_sd, ymax=Hire+tx_sd), width=.2,
                position=position_dodge(.9)) +
  facet_wrap(~Treatment) + 
  geom_hline(aes(yintercept = tx_avg), color="black", lty = "dashed") +
  geom_text(aes(label = paste0("Average: ", round(tx_avg, digits=2))),
            y = 3.2, x = 3, cex=3.5) +
  theme_bw() + 
  scale_fill_manual(values = c("grey30", "grey70")) + 
  xlab("")
ggsave("ATE_barplots.pdf")
